Aminet 1 (Walnut Creek)

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 1 (Walnut Creek) / Aminet - June 1993 [Walnut Creek].iso / usenet / sources / volume90 / util / bsindex1 / part01 / src / expression.c < prev next >

Wrap

C/C++ Source or Header | 1990-02-02 | 11KB | 520 lines

/* * EXPRESSION.C * * This module contains the routines needed to parse and evaluate an * expression. This is used while selecting which files to print. * * This is the BNF for the set of expressions recognised by the * database searcher. It doesn't contain any semantic information however. * * Expression ::= Boolean | Boolean Operator Expression | ALL * Operator ::= AND | OR * Boolean ::= NOT Boolean | * '(' Expression ')' | * BoolIdent | * NumIdent Op Value | * StringIdent Op String | * DateIdent Op Date * Op ::= '<' | '>' | '<=' | '>=' | '=' | '<>' * BoolIdent ::= BINARY | ONLINE | LOCAL | VALID * StringIdent ::= COMMENT | DISKNAME | NAME | OWNER | PATHNAME * NumIdent ::= ACCESS | SECTION | DIRECTORY | DISKDIRNUM | * SIZE | KSIZE * DateIdent ::= DATE * String ::= (Text enclosed in quotes) * Value ::= (A number) * Date ::= (A date in the format "dd-mon-yy") * */ #ifndef LATTICE_50 #include "system.h" #endif #include "bbsindex.h" #define mystrcmp stricmp /* Make match() insensitive to case */ /* * BNF procedures */ void bnf_op(), bnf_date(), bnf_expression(), bnf_boolean(); /* * Global variable(s) */ static int treepos; /* Next free entry in tree array */ static int curtoken; /* Current token */ static int curvalue; /* Current number with E_NUMBER */ static char curstring[MAXCOM]; /* Current string with E_STRING */ /* * Tokens recognised as special in expressions */ struct { int tag; char *name; } tokens[] = { { E_EQ, "=" }, { E_NE, "<>" }, { E_LE, "<=" }, { E_GE, ">=" }, { E_LT, "<" }, { E_GT, ">" }, { E_AND, "AND" }, { E_OR, "OR" }, { E_NOT, "NOT" }, { E_ALL, "ALL" }, { E_OPENPAR, "(" }, { E_CLOSEPAR, ")" }, { E_TEXT, "TEXT" }, { E_REMOTE, "REMOTE" }, { E_INVALID, "INVALID" }, { E_OFFLINE, "OFFLINE" }, { NULL, NULL } }; /* * wild() * ------ * This routine does a wildcard match on the two specified strings. * The first string contains the string to check, and the second string * containts the wildcard pattern to check against. The special wild * card characters are '?' which matches any single characters, and * '*' which matches any number of characters. 0 is returned if the * the two strings match, else a +ve or -ve number. * */ int wild(s,w) char *s; char *w; { char *p; char ch; while (*w && *s) { switch (*w) { case '*': ch = toupper(w[1]); if (!ch) return (0); for (p = s; *p; p++) { if (toupper(*p) == ch || ch == '?') { if (!wild(p,w+1)) return (0); } } return (toupper(*p)-ch); case '?': break; default: if (toupper(*s) != toupper(*w)) return (toupper(*s)-toupper(*w)); } w++; s++; } return (toupper(*s)-toupper(*w)); } /* * Dirty great macro to compare two values according to an operator */ #define e_cmp(v1,op,v2) \ switch (op) { \ case E_EQ: return ((v1) == (v2)); \ case E_NE: return ((v1) != (v2)); \ case E_LT: return ((v1) < (v2)); \ case E_GT: return ((v1) > (v2)); \ case E_LE: return ((v1) <= (v2)); \ case E_GE: return ((v1) >= (v2)); \ } #define e_numcmp(n) e_cmp(n, e->op, e->num) #define e_strcmp(s) e_cmp(mystrcmp(s,e->text), e->op, 0) #define e_wildcmp(s) e_cmp(wild(s,e->text), e->op, 0) /* * match() * ------- * Scans the the parse tree, and returns TRUE if the current * record matches the criteria in the tree, else FALSE. * * Aside: Aren't C macros wonderful? Just think how long this function * would be if it wasn't for the above e_cmp, e_numcmp and e_strcmp * macros. */ int match(p, e) UDHEAD *p; EXPR *e; { switch (e->field) { case E_AND: return (match(p, e->left) && match(p, e->right)); case E_OR: return (match(p, e->left) || match(p, e->right)); case E_NOT: return (!match(p, e->left)); case E_ALL: return (TRUE); case I_LOCAL: return ((int)p->local); case I_BINARY: return ((int)p->bin); case I_VALID: return ((int)p->valid); case I_ONLINE: return ((int)p->online); case E_REMOTE: return (!(int)p->local); case E_TEXT: return (!(int)p->bin); case E_INVALID: return (!(int)p->valid); case E_OFFLINE: return (!(int)p->online); case I_ACCESS: e_numcmp(p->accesses); case I_DATE: e_numcmp(p->date); case I_DIRECTORY: e_numcmp(p->dir); case I_DISKDIRNUM: e_numcmp(p->dirnum); case I_KSIZE: e_numcmp(BTOK(p->length)); case I_SECTION: e_numcmp(p->section); case I_SIZE: e_numcmp(p->length); case I_DISKNAME: e_strcmp(p->disk_name); case I_COMMENT: e_strcmp(p->desc); case I_NAME: e_strcmp(p->cat_name); case I_OWNER: e_strcmp(p->owner); case W_DISKNAME: e_wildcmp(p->disk_name); case W_COMMENT: e_wildcmp(p->desc); case W_NAME: e_wildcmp(p->cat_name); case W_OWNER: e_wildcmp(p->owner); } } /* * newnode() * --------- * This function allocates a new node from the tree array, and returns * a pointer to it. If overflow occurs, it aborts with an error message. */ EXPR *newnode() { treepos++; if (treepos >= MAXEXPR) { scripterror("expression to complex\n"); Cleanup(10); } return (&tree[treepos]); } /* * readtoken() * ----------- * This function reads the next token from the command line, starting * at position curpos. curtoken is set to the type of token received. * If it was E_STRING, then curstring points to the string it * represents (without the quotes). If it was E_NUM, then curvalue * points to the number represented. If the end of the line is * reached, E_END is automatically set. */ #define nextch() (ch = combuf[compos++]) #define ungetnext() (compos--) void readtoken() { char *p = curstring, ch; int i; if (compos >= comlen) { curtoken = E_END; return; } do { nextch(); } while (ch == CHAR_SPACE); if (ch == CHAR_QUOTES) { /* Handle string in quotes */ curtoken = E_STRING; nextch(); if (*p == CHAR_NULL) { curtoken = E_END; return; } do { *p++ = ch; nextch(); } while (ch && ch != CHAR_QUOTES); *p = CHAR_NULL; return; } if (isdigit(ch)) { /* Numeric value? */ curtoken = E_NUMBER; curvalue = 0; do { curvalue = curvalue * 10 + (ch - '0'); nextch(); } while (isdigit(ch)); ungetnext(); return; } if (isalpha(ch)) { /* Alphabetic identifier? */ do { *p++ = ch; nextch(); } while (isalpha(ch)); } else if (ch == CHAR_NULL) { /* At end of line? */ curtoken = E_END; return; } else { /* Must be punctuation */ do { *p++ = ch; nextch(); } while (ch && !isalpha(ch) && !isdigit(ch) && ch != CHAR_SPACE && ch != CHAR_QUOTES); } *p = CHAR_NULL; ungetnext(); /* Check for character A - F */ if (curstring[1] == CHAR_NULL) { switch (curstring[0]) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': curvalue = curstring[0] + 10 - 'A'; curtoken = E_NUMBER; return; } } /* Now find token that matches string */ for (i = 0; i < MAXINDEX; i++) { if (!strcmp(curstring, indexes[i].name)) { curtoken = indexes[i].tag; return; } } for (i = 0; tokens[i].name; i++) { if (!strcmp(curstring, tokens[i].name)) { curtoken = tokens[i].tag; return; } } scripterror("unrecognised symbol '"); print2(curstring,"' in expression\n"); Cleanup(10); } /* * com_select() * ------------ * This command parses the expression in the command buffer starting * at position compos, and builds an expression tree representing it. * This tree is stored in tree[], starting at element 0. This expression * tree is used when match() is called. */ void com_select() { treepos = 0; readtoken(); bnf_expression(tree); } /* * bnf_expression() * ---------------- * Parses the BNF line: * * Expression ::= Boolean | Boolean Operator Expression | ALL * */ void bnf_expression(e) EXPR *e; { EXPR dummy; if (curtoken == E_ALL) e->field = E_ALL; else { bnf_boolean(&dummy); if (curtoken == E_AND || curtoken == E_OR) { e->field = curtoken; e->left = newnode(); e->right = newnode(); *(e->left) = dummy; readtoken(); bnf_expression(e->right); } else *e = dummy; } } /* * bnf_boolean() * ------------- * Parses the following BNF line: * * Boolean ::= NOT Boolean | * '(' Expression ')' | * BoolIdent | * NumIdent Op Value | * StringIdent Op String | * DateIdent Op Date */ void bnf_boolean(e) EXPR *e; { char *p; e->field = curtoken; switch (curtoken) { case E_NOT: e->left = newnode(); readtoken(); bnf_boolean(e->left); break; case E_OPENPAR: readtoken(); bnf_expression(e); if (curtoken != E_CLOSEPAR) { scripterror("missing close parenthesis\n"); Cleanup(10); } readtoken(); break; case I_BINARY: case I_VALID: case I_ONLINE: case I_LOCAL: case E_TEXT: case E_INVALID: case E_OFFLINE: case E_REMOTE: readtoken(); break; case I_ACCESS: case I_SECTION: case I_DIRECTORY: case I_DISKDIRNUM: case I_SIZE: case I_KSIZE: readtoken(); bnf_op(e); if (curtoken != E_NUMBER) { scripterror("number expected in expression\n"); Cleanup(10); } e->num = curvalue; readtoken(); break; case I_COMMENT: case I_DISKNAME: case I_NAME: case I_OWNER: case I_PATHNAME: readtoken(); bnf_op(e); if (curtoken != E_STRING) { scripterror("string expected in expression\n"); Cleanup(10); } e->text = mymalloc(strlen(curstring)+1); strcpy(e->text, curstring); /* * Now check the string, and see if it contains any wildcard * characters. If it does, use operation W_{string} * instead of E_{string}. If it doesn't, use the standard * strcmp which is much faster. */ for (p = curstring; *p; *p++) { if (*p == '*' || *p == '?') { e->field = tokentowild(e->field); break; } } readtoken(); break; case I_DATE: readtoken(); bnf_op(e); bnf_date(e); break; default: scripterror("error in expression\n"); Cleanup(10); } } /* * bnf_op * ------ * Sets the operation field of the specified expression to the value * of the next token on the command line. */ void bnf_op(e) EXPR *e; { switch (curtoken) { case E_EQ: case E_NE: case E_LT: case E_GT: case E_LE: case E_GE: e->op = curtoken; readtoken(); break; default: scripterror("expected comparison operator\n"); Cleanup(10); } } /* * bnf_date() * ----------- * Reads a date from the command line, validates it, and sets the * 'num' field in the expression to its numerical value. The date * is in the format "dd-mon-yy", e.g. "15-Jun-89" */ void bnf_date(e) EXPR *e; { int day, month, year; if (curtoken != E_STRING) { scripterror("expected date string in expression\n"); Cleanup(10); } if (strlen(curstring) != 9 || curstring[2] != '-' || curstring[6] != '-') { scripterror("Invalid date format (should be dd-mon-yy)\n"); Cleanup(10); } day = atoi(curstring); year = atoi(curstring+7); for (month = 0; month < 12 && strnicmp(curstring+3, months[month], 3); month++) ; if ((day < 1 || day > 31) || (month < 1 || month > 12) || (year < 1 || year > 99)) { scripterror("Invalid date format (should be dd-mon-yy)\n"); Cleanup(10); } e->num = (((year * 13) + month) << 5) + day; readtoken(); }